[HVM] qemu: Add guest address-space mapping cache.
authorkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Thu, 7 Dec 2006 11:12:52 +0000 (11:12 +0000)
committerkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Thu, 7 Dec 2006 11:12:52 +0000 (11:12 +0000)
On IA32 host or IA32 PAE host, at present, generally, we can't create
an HVM guest with more than 2G memory, because generally it's almost
impossible for Qemu to find a large enough and consecutive virtual
address space to map an HVM guest's whole physical address space.
The attached patch fixes this issue using dynamic mapping based on
little blocks of memory.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
tools/ioemu/target-i386-dm/cpu.h
tools/ioemu/target-i386-dm/exec-dm.c
tools/ioemu/vl.c
tools/ioemu/vl.h

index dc016edd3e0f9d4839ed3625079c9ea07ec06644..017b8c021b1c22b4b4258787c19dda23beb56fe4 100644 (file)
@@ -25,7 +25,8 @@
 #ifdef TARGET_X86_64
 #define TARGET_LONG_BITS 64
 #else
-#define TARGET_LONG_BITS 32
+/* #define TARGET_LONG_BITS 32 */
+#define TARGET_LONG_BITS 64 /* for Qemu map cache */
 #endif
 
 /* target supports implicit self modifying code */
index be8b280dda6f61df89d15874401d8f1b2f049e5f..755d54777237189b26c828db8a613cb27cf04ebf 100644 (file)
@@ -36,6 +36,7 @@
 
 #include "cpu.h"
 #include "exec-all.h"
+#include "vl.h"
 
 //#define DEBUG_TB_INVALIDATE
 //#define DEBUG_FLUSH
@@ -426,6 +427,12 @@ static inline int paddr_is_ram(target_phys_addr_t addr)
 #endif
 }
 
+#if defined(__i386__) || defined(__x86_64__)
+#define phys_ram_addr(x) (qemu_map_cache(x))
+#elif defined(__ia64__)
+#define phys_ram_addr(x) (phys_ram_base + (x))
+#endif
+
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 
                             int len, int is_write)
 {
@@ -438,7 +445,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
         l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); 
         if (l > len)
             l = len;
-       
+
         io_index = iomem_index(addr);
         if (is_write) {
             if (io_index) {
@@ -460,9 +467,10 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
                 }
             } else if (paddr_is_ram(addr)) {
                 /* Reading from RAM */
-                memcpy(phys_ram_base + addr, buf, l);
+                ptr = phys_ram_addr(addr);
+                memcpy(ptr, buf, l);
 #ifdef __ia64__
-                sync_icache((unsigned long)(phys_ram_base + addr), l);
+                sync_icache(ptr, l);
 #endif 
             }
         } else {
@@ -485,7 +493,8 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
                 }
             } else if (paddr_is_ram(addr)) {
                 /* Reading from RAM */
-                memcpy(buf, phys_ram_base + addr, l);
+                ptr = phys_ram_addr(addr);
+                memcpy(buf, ptr, l);
             } else {
                 /* Neither RAM nor known MMIO space */
                 memset(buf, 0xff, len); 
index a2700ef31353a50b21daee8b3fc313f1afeed12e..b65f61d8df9c67eea103edf2a82ecc5797dc3c6e 100644 (file)
@@ -5808,6 +5808,92 @@ int set_mm_mapping(int xc_handle, uint32_t domid,
     return 0;
 }
 
+#if defined(__i386__) || defined(__x86_64__)
+static struct map_cache *mapcache_entry;
+static unsigned long nr_buckets;
+
+static int qemu_map_cache_init(unsigned long nr_pages)
+{
+    unsigned long max_pages = MAX_MCACHE_SIZE >> PAGE_SHIFT;
+    int i;
+
+    if (nr_pages < max_pages)
+        max_pages = nr_pages;
+
+    nr_buckets = (max_pages << PAGE_SHIFT) >> MCACHE_BUCKET_SHIFT;
+
+    fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);
+
+    mapcache_entry = malloc(nr_buckets * sizeof(struct map_cache));
+    if (mapcache_entry == NULL) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    memset(mapcache_entry, 0, nr_buckets * sizeof(struct map_cache));
+
+    /*
+     * To avoid ENOMEM from xc_map_foreign_batch() at runtime, we
+     * pre-fill all the map caches in advance.
+     */
+    for (i = 0; i < nr_buckets; i++)
+       (void)qemu_map_cache(((target_phys_addr_t)i) << MCACHE_BUCKET_SHIFT);
+
+    return 0;
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr)
+{
+    struct map_cache *entry;
+    unsigned long address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
+    unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1);
+
+    /* For most cases (>99.9%), the page address is the same. */
+    static unsigned long last_address_index = ~0UL;
+    static uint8_t      *last_address_vaddr;
+
+    if (address_index == last_address_index)
+        return last_address_vaddr + address_offset;
+
+    entry = &mapcache_entry[address_index % nr_buckets];
+
+    if (entry->vaddr_base == NULL || entry->paddr_index != address_index)
+    { 
+        /* We need to remap a bucket. */
+        uint8_t *vaddr_base;
+        unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT];
+        unsigned int i;
+
+        if (entry->vaddr_base != NULL) {
+            errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+            if (errno) {
+                fprintf(logfile, "unmap fails %d\n", errno);
+                exit(-1);
+            }
+        }
+
+        for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i++)
+            pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-PAGE_SHIFT)) + i;
+
+        vaddr_base = xc_map_foreign_batch(
+            xc_handle, domid, PROT_READ|PROT_WRITE,
+            pfns, MCACHE_BUCKET_SIZE >> PAGE_SHIFT);
+        if (vaddr_base == NULL) {
+            fprintf(logfile, "xc_map_foreign_batch error %d\n", errno);
+            exit(-1);
+        }
+
+        entry->vaddr_base  = vaddr_base;
+        entry->paddr_index = address_index;;
+    }
+
+    last_address_index = address_index;
+    last_address_vaddr = entry->vaddr_base;
+
+    return last_address_vaddr + address_offset;
+}
+#endif
+
 int main(int argc, char **argv)
 {
 #ifdef CONFIG_GDBSTUB
@@ -6130,6 +6216,7 @@ int main(int argc, char **argv)
                 break;
             case QEMU_OPTION_m:
                 ram_size = atol(optarg) * 1024 * 1024;
+                ram_size = (uint64_t)atol(optarg) * 1024 * 1024;
                 if (ram_size <= 0)
                     help();
 #ifndef CONFIG_DM
@@ -6400,50 +6487,41 @@ int main(int argc, char **argv)
         shared_page_nr = nr_pages - 1;
 #endif
 
-    page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t));
-    if (page_array == NULL) {
-        fprintf(logfile, "malloc returned error %d\n", errno);
-        exit(-1);
-    }
-
 #if defined(__i386__) || defined(__x86_64__)
-    for ( i = 0; i < tmp_nr_pages; i++)
-        page_array[i] = i;
 
-    phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
-                                         PROT_READ|PROT_WRITE, page_array,
-                                         tmp_nr_pages);
-    if (phys_ram_base == NULL) {
-        fprintf(logfile, "batch map guest memory returned error %d\n", errno);
+    if ( qemu_map_cache_init(tmp_nr_pages) )
+    {
+        fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno);
         exit(-1);
     }
 
     shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                       PROT_READ|PROT_WRITE,
-                                       page_array[shared_page_nr]);
+                                       PROT_READ|PROT_WRITE, shared_page_nr);
     if (shared_page == NULL) {
         fprintf(logfile, "map shared IO page returned error %d\n", errno);
         exit(-1);
     }
 
-    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
-            shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
+    fprintf(logfile, "shared page at pfn:%lx\n", shared_page_nr);
 
     buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
                                             PROT_READ|PROT_WRITE,
-                                            page_array[shared_page_nr - 2]);
+                                            shared_page_nr - 2);
     if (buffered_io_page == NULL) {
         fprintf(logfile, "map buffered IO page returned error %d\n", errno);
         exit(-1);
     }
 
-    fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n",
-            shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2]));
-
-    free(page_array);
+    fprintf(logfile, "buffered io page at pfn:%lx\n", shared_page_nr - 2);
 
 #elif defined(__ia64__)
-  
+
+    page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t));
+    if (page_array == NULL) {
+        fprintf(logfile, "malloc returned error %d\n", errno);
+        exit(-1);
+    }
+
     if (xc_ia64_get_pfn_list(xc_handle, domid, page_array,
                              IO_PAGE_START >> PAGE_SHIFT, 3) != 3) {
         fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
index 3df963241c95d18d828c3a9f48f83211c08c4a30..9e4d92a17b2a9a145796a32a8743e678f3dcaa80 100644 (file)
@@ -156,6 +156,26 @@ extern void *shared_vram;
 
 extern FILE *logfile;
 
+
+#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) 
+#define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
+#define MCACHE_BUCKET_SHIFT 16
+#elif defined(__x86_64__)
+#define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
+#define MCACHE_BUCKET_SHIFT 20
+#endif
+
+#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
+
+struct map_cache {
+    unsigned long paddr_index;
+    uint8_t      *vaddr_base;
+};
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr);
+#endif
+
 extern int xc_handle;
 extern int domid;